/*
Copyright 2008-2009 Elöd Egyed-Zsigmond, Cyril Laitang
Copyright 2009-2011 Samuel Gesche

This file is part of IPRI News Analyzer.

IPRI News Analyzer is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

IPRI News Analyzer is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with IPRI News Analyzer.  If not, see <http://www.gnu.org/licenses/>.
*/

package proc.rss;

import data.base.connectors.RSSFeedDatabase;
import data.base.Database;
import data.base.Config;
import data.base.NoBaseException;
import data.structures.rss.BaseRSSItem;

import proc.text.Codecs;
import proc.text.Out;

import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;
import java.util.regex.Pattern;
import java.util.regex.Matcher;

public class NewsSaver {
    private Database baseIPRI;
    // Nombre de nouvelles entrées dans la base.
    private int totalItemCount = 0;
    private Thread traitement;
    private String readErrorMessage = "";
    private String googleNewsURL = "http://news.google.com/news?pz=1&cf=all&ned=fr&" +
            "topic=h&num=50&output=rss";
    
    public NewsSaver(Database base){
        baseIPRI=base;
        traitement = new Thread() {

                private boolean enCours = false;

                @Override
                public void run() {
                    try {
                        RSSFeedDatabase rS = new RSSFeedDatabase(baseIPRI);
                        try {
                            while (true) {
                                if (entrees.size() > 0) {
                                    Entree e = pioche();
                                    enCours = true;
                                    BaseRSSItem entry = e.getEntree();
                                    Date lastDate = rS.getNewsLastEntryDate();
                                    if (isToInsert(entry, lastDate)) {
                                        String title = entry.getTitle();
                                        String desc = entry.getDescription();
                                        BaseRSSItem[] urls = parseNews(desc);
                                        Date pubDate = entry.getPubDate();
                                        if(urls.length==0){
                                            Out.printErreur("Pas d'url pour le sujet "+
                                                    Codecs.desEscapeHTML(Codecs.escapeHTML(entry.getTitle())));
                                        } else {
                                            Out.printInfo("Stockage du sujet : " +
                                                    Codecs.desEscapeHTML(Codecs.escapeHTML(entry.getTitle())) +
                                                    " (" + (urls.length) + " URL, daté du " + pubDate + ")...");
                                            rS.insereSujet(title, urls, pubDate);
                                        }
                                    } else {
                                        //System.out.println("  Sujet rejeté à la sauvegarde : "+e.getTitle()+" (flux "+e.getId()+", date : "+e.getPubDate()+" vs "+lastDate+")");
                                    }
                                    enCours = false;
                                } else {
                                    enCours = false;
                                    try {
                                        Thread.sleep(5000);
                                    } catch (InterruptedException ie) {
                                        // On y va !
                                    }
                                }

                            }
                        } catch (Exception ex) {
                            //debug
                            //System.out.println(String.format("Entry insert problem : %s'", ex.getMessage()));
                        }
                    } catch(NoBaseException nbe){

                    }
                }

                @Override
                public String toString() {
                    return "" + enCours;
                }
            };
    }

    public void start(){
        traitement.start();
    }

    private Entree pioche() {
        synchronized (lock) {
            Entree e;
            //System.out.println("Choix d'un article à traiter parmi "+entrees.size());
            Iterator<Entree> i = entrees.iterator();
            e = i.next();
            while (i.hasNext()) {
                Entree ee = i.next();
                if (ee.getDate().before(e.getDate())) {
                    e = ee;
                }
            }
            entrees.remove(e);
            //System.out.println("Article à traiter pioché par "+Thread.currentThread().getName()+".");
            return e;
        }
    }

    public int readNews() throws NoBaseException {
        RSSFeedDatabase rS = new RSSFeedDatabase(baseIPRI);
        int newItemCount = 0;
        try {
            BaseRSSItem[] entryList = RSSParser.lectureNews(googleNewsURL);

            Vector<BaseRSSItem> entries = new Vector<BaseRSSItem>();
            Date lastDate = rS.getNewsLastEntryDate();
            for (int i = 0; i <entryList.length; i++) {
                if (isToInsert(entryList[i], lastDate)) {
                    entries.addElement(entryList[i]);
                    newItemCount++;
                    totalItemCount++;
                }

                BaseRSSItem[] liste = new BaseRSSItem[entries.size()];
                entries.toArray(liste);
                insert(liste);
            }
        } catch (RSSParsingException e) {
            newItemCount = -1;
            readErrorMessage = e.getMessage();
        }
        return newItemCount;
    }

    private Set<Entree> entrees = new HashSet<Entree>();
    private final Object lock = new Object();

    private void insert(BaseRSSItem[] entries) {
        synchronized (lock) {
            for (int i = 0; i < entries.length; i++) {
                entrees.add(new Entree(entries[i]));
            }
        }
        traitement.interrupt();
    }


    boolean isToInsert(BaseRSSItem entry, Date lastDate) {
        boolean b = false;
        try{
            b = isToInsert(entry.getPubDate(), lastDate);
        } catch(NoDateException nde){
            // pas d'exception possible théoriquement avec le RSSParser (il en
            // envoie déjà beaucoup lui-même).
        }
        return b;
    }

    boolean isToInsert(Date entryPubDate, Date lastDate) throws NoDateException {
        Date limite = new Date(Config.getDateDepartCorpus());
        boolean result = false;
        if (entryPubDate != null) {
            //debug
            //System.out.println(String.format(" CheckedDate '%tY-%tm-%td %tH:%tM:%tS' , result %b", entryPubDate,entryPubDate,entryPubDate,entryPubDate,entryPubDate,entryPubDate, entryPubDate.after(lastDate) ));
            if (entryPubDate.getTime() - lastDate.getTime() >= 1000 &&
                    //au lieu d'utiliser Date.after(Date),
                    //parce que de temps en temps il y a quelques millisecondes
                    //de décalage, allez savoir pourquoi
                    entryPubDate.getTime() - limite.getTime() >= 1000 &&
                    entryPubDate.getTime() < System.currentTimeMillis()) {
                //parce que certains articles sont postdatés
                result = true;
            }
        } else {
            //pubdate null
            throw new NoDateException();
        }
        //System.out.println("Limite : "+limite+" - dernière date : "+lastDate+" - date à vérifier : "+entryPubDate+" - résultat : "+result);
        return result;
    }

    public String getReadErrorMessage() {
        return readErrorMessage;
    }

    public BaseRSSItem[] parseNews(String texte){
        Vector<BaseRSSItem> urls = new Vector<BaseRSSItem>();

        // On traite l'url en /story
        Pattern formeURL2 = Pattern.compile("\\\"http://news\\.google\\.com/news/story.*?\\\"");
        Matcher m2 = formeURL2.matcher(texte);
        while(m2.find()){
            String urlGoogle = m2.group().replaceAll("\"", "");
            String codeSujet = urlGoogle.replaceAll(".*?ncl=", "");
            //System.out.println("Code sujet : "+codeSujet);
            try{
                BaseRSSItem[] articles = RSSParser.trouveArticlesSujet(codeSujet);
                for(int i=0; i<articles.length; i++){
                    if(!(articles[i].getLink().equals(""))){
                        urls.addElement(articles[i]);
                    }
                }
            } catch(RSSParsingException rpe){
                readErrorMessage = "impossible de récupérer le flux étendu";
            }
        }
        BaseRSSItem[] res = new BaseRSSItem[urls.size()];
        urls.toArray(res);
        return res;
    }
}
